---
title: "Table 2"
author: "Yingjie Fan"
date: "2023-06-02"
---
```{r setup, include=FALSE}
rm(list=ls())
#install.packages(c("dplyr", "purrr", "data.table", "readr", "tidyr"))
library(dplyr)
library(data.table)
library(readr)
library(tidyr)
path = "" # Insert path
```

Data Source for Replies:
All tweets were retrieved using Twitter Academic API as follows:

1. Registered for API Access at https://developer.twitter.com/en/products/twitter-api/academic-research
2. Secured API credentials and used https://github.com/cjbarrie/academictwitteR R package to access Twitter Academic API.
3. Set authorization credentials with the set_bearer() function to allow for storage of bearer token in the .Renviron file.

The exact code for data cleaning from all tweets to replies is as follows:

```{Obtain Replies from the 4 Chinese outlets}
# replies <-
#   get_all_tweets(
#     query = query <- build_query(users = c("CGTNOfficial","ChinaDaily","PDChina", "XHNews"),
#                      is_reply=TRUE),
#     start_tweets = "2013-01-23T00:00:00Z",
#     end_tweets = "2020-01-24T00:00:00Z",
#     file = "replies",
#     data_path = path,
#     n = 50000000
#   )
```

```{r Pre-processing Raw Data for Replies}
# count = replies %>%
#   select(author_id,in_reply_to_user_id)%>%
#   group_by(author_id,in_reply_to_user_id)%>%
#   summarise(n=n()) %>%
#   mutate(author_id=dplyr::recode(author_id, `1115874631`='CGTN',`87775422`='China Daily',`303862998`='People\'s Daily',`487118986`='Xinhua'))%>%
#   mutate(in_reply_to_user_id=dplyr::recode(in_reply_to_user_id, `1115874631`='CGTN',`87775422`='China Daily',`303862998`='People\'s Daily',`487118986`='Xinhua'))
# 
# count_total = replies %>%
#   group_by(author_id)%>%
#   summarise(total_replies=n()) %>%
#   mutate(author_id=dplyr::recode(author_id, `1115874631`='CGTN',`87775422`='China Daily',`303862998`='People\'s Daily',`487118986`='Xinhua'))
# 
# count=left_join(count, count_total)
# 
# vol_total_summary = fread(paste0(path,"/Data/Tweets/vol_total_summary.csv"), drop = TRUE)
#                          
# count = left_join(count,vol_total_summary, by = c("author_id" = "username"))
# colnames(count)  = c("author_id","in_reply_to_user_id","count_replies_self","total_replies","total_tweets")
# 
# write.csv(count,paste0(path,"/Data/Replies/replies_summary.csv"))

```


```{r Table 2: Number of Replies}
count=fread(paste0(path,"/Data/Replies/replies_summary.csv"), drop = TRUE)

count  = count %>% 
  mutate(ratio_reply_self=round(100*count_replies_self/total_replies,2),
         ratio_total=round(100*total_replies/total_tweets,2))%>%
  select(author_id,in_reply_to_user_id,total_replies,ratio_reply_self,ratio_total)%>%
    arrange(desc(ratio_reply_self))

write.csv(count[1:4,],file=paste0(path,"/Output/table2_replies.csv"))
```
